library(dplyr)
#library(psych) #for pairs.panels, but could use other packages, e.g. GGalley
library(lavaan)
library(semPlot)
library(DiagrammeR)
library(tidyr)
library(ggplot2)

Import data

combined=read.csv("data/annual_averages/annual_data_compiled_regions.csv",stringsAsFactors = F)
cnames=read.csv("analysis/column_names_region.csv", stringsAsFactors = F)
dsub=filter(combined, Year>=1975) %>% arrange(Region,Year)
focaldata=dsub[,cnames$Datacolumn]
fvars=cnames$Shortname
colnames(focaldata)=fvars
regions=unique(focaldata$region)
regionorder=c("West","North","South")
years=1975:2021

#focal variables
varnames=c("temp", "flow","nitrate","ammonia","dophos","chla","hcope","clad","amphi","pcope","mysid","potam","corbic","sside","estfish_bsot","estfish_bsmt")

source("analysis/myLavaanPlot.r")

Data prep

Log transform, scale

#log transform
logvars=fvars[cnames$Log=="yes"]
logtrans=function(x) {
  x2=x[which(!is.na(x))]
  if(any(x2==0)) {log(x+min(x2[which(x2>0)],na.rm=T))}
  else {log(x)}
}
focaldatalog = focaldata %>% 
  mutate_at(logvars,logtrans)

#scale data
fdr0=focaldatalog
tvars=fvars[-(1:2)]

fdr=fdr0 %>% group_by(region) %>% 
  #lag
  mutate_at(tvars,list("1"=lag)) %>% 
  #scale
  mutate_at(-(1:2),scale) %>% 
  ungroup() %>% 
  as.data.frame() 

#detrended data
fdr_dtr=fdr0 %>% group_by(region) %>% 
  #detrend
  mutate_at(tvars,function(x) { 
    x<<-x
    if(!all(is.na(x))) {
      if((length(which(x==0))/length(x))<0.5) {
        x2<<-x
        x2[x2==0]=NA
        res<<-residuals(lm(x2~years))
        out=x
        out[which(!is.na(x2))]=res
        return(out)
      } else {return(x)}
    } else {return(x)}
  }) %>%
  #lag
  mutate_at(tvars,list("1"=lag)) %>% 
  #scale
  mutate_at(-(1:2),scale) %>% 
  ungroup() %>% 
  as.data.frame() 

Time series plots

Cross-correlation matrices

(only sig correlations shown… no correction for multiple comparisons)

Fish indices are not correlated in S and N!

SEM model

With and without detrending.

#west has no ssides
modwest='zoop=~hcope+mysid
        fish=~estfish_bsmt+estfish_bsot
        zoop~chla+potam+flow
        chla~potam+flow
        fish~zoop+flow
'
modfitwest=sem(modwest, data=filter(fdr,region=="West"))
modfitwest_dtr=sem(modwest, data=filter(fdr_dtr,region=="West"))
summary(modfitwest, standardized=T, rsq=T)
## lavaan 0.6-4 ended normally after 26 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         16
## 
##                                                   Used       Total
##   Number of observations                            40          47
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                      12.041
##   Degrees of freedom                                 9
##   P-value (Chi-square)                           0.211
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   zoop =~                                                               
##     hcope             1.000                               0.736    0.879
##     mysid             1.118    0.142    7.899    0.000    0.823    0.899
##   fish =~                                                               
##     estfish_bsmt      1.000                               0.807    0.818
##     estfish_bsot      0.939    0.171    5.488    0.000    0.758    0.768
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   zoop ~                                                                
##     chla              0.670    0.101    6.609    0.000    0.911    0.778
##     potam            -0.174    0.082   -2.128    0.033   -0.236   -0.236
##     flow             -0.057    0.076   -0.754    0.451   -0.078   -0.080
##   chla ~                                                                
##     potam            -0.325    0.135   -2.407    0.016   -0.325   -0.380
##     flow              0.098    0.131    0.748    0.454    0.098    0.118
##   fish ~                                                                
##     zoop              0.800    0.148    5.416    0.000    0.729    0.729
##     flow              0.379    0.088    4.317    0.000    0.469    0.484
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .hcope             0.160    0.050    3.183    0.001    0.160    0.228
##    .mysid             0.161    0.056    2.875    0.004    0.161    0.192
##    .estfish_bsmt      0.323    0.102    3.179    0.001    0.323    0.332
##    .estfish_bsot      0.400    0.109    3.653    0.000    0.400    0.410
##    .chla              0.584    0.131    4.472    0.000    0.584    0.802
##    .zoop              0.123    0.046    2.680    0.007    0.227    0.227
##    .fish              0.038    0.073    0.523    0.601    0.059    0.059
## 
## R-Square:
##                    Estimate
##     hcope             0.772
##     mysid             0.808
##     estfish_bsmt      0.668
##     estfish_bsot      0.590
##     chla              0.198
##     zoop              0.773
##     fish              0.941
summary(modfitwest_dtr, standardized=T, rsq=T)
## lavaan 0.6-4 ended normally after 22 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         16
## 
##                                                   Used       Total
##   Number of observations                            40          47
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                      13.388
##   Degrees of freedom                                 9
##   P-value (Chi-square)                           0.146
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   zoop =~                                                               
##     hcope             1.000                               0.807    0.844
##     mysid             0.869    0.164    5.310    0.000    0.701    0.779
##   fish =~                                                               
##     estfish_bsmt      1.000                               0.711    0.720
##     estfish_bsot      0.935    0.233    4.010    0.000    0.665    0.665
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   zoop ~                                                                
##     chla              0.688    0.113    6.071    0.000    0.853    0.820
##     potam            -0.001    0.102   -0.013    0.990   -0.002   -0.002
##     flow              0.026    0.105    0.248    0.804    0.032    0.033
##   chla ~                                                                
##     potam             0.032    0.162    0.196    0.844    0.032    0.034
##     flow              0.215    0.160    1.341    0.180    0.215    0.229
##   fish ~                                                                
##     zoop              0.550    0.150    3.666    0.000    0.624    0.624
##     flow              0.429    0.105    4.106    0.000    0.603    0.618
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .hcope             0.263    0.097    2.723    0.006    0.263    0.288
##    .mysid             0.318    0.094    3.398    0.001    0.318    0.393
##    .estfish_bsmt      0.469    0.143    3.284    0.001    0.469    0.481
##    .estfish_bsot      0.557    0.151    3.698    0.000    0.557    0.558
##    .chla              0.879    0.197    4.472    0.000    0.879    0.953
##    .zoop              0.205    0.091    2.258    0.024    0.315    0.315
##    .fish              0.034    0.098    0.343    0.732    0.066    0.066
## 
## R-Square:
##                    Estimate
##     hcope             0.712
##     mysid             0.607
##     estfish_bsmt      0.519
##     estfish_bsot      0.442
##     chla              0.047
##     zoop              0.685
##     fish              0.934
# par(mfrow=c(1,2))
# semPaths(modfitwest, "std", edge.label.cex = 1, residuals = F)
# semPaths(modfitwest, "par", edge.label.cex = 1, residuals = F)

labelswest <- createLabels(modfitwest, cnames)

# residuals(modfitwest)
# modificationindices(modfitwest)
modnorth='zoop=~hcope+mysid
        fish=~estfish_bsmt+estfish_bsot
        zoop~chla+potam+flow
        chla~potam+flow
        fish~zoop+flow
'
modfitnorth=sem(modnorth, data=filter(fdr,region=="North"))
modfitnorth_dtr=sem(modnorth, data=filter(fdr_dtr,region=="North"))
summary(modfitnorth, standardized=T, rsq=T)
## lavaan 0.6-4 ended normally after 29 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         16
## 
##                                                   Used       Total
##   Number of observations                            40          47
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                      23.743
##   Degrees of freedom                                 9
##   P-value (Chi-square)                           0.005
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   zoop =~                                                               
##     hcope             1.000                               0.611    0.709
##     mysid             1.559    0.301    5.186    0.000    0.953    0.978
##   fish =~                                                               
##     estfish_bsmt      1.000                               0.556    0.563
##     estfish_bsot      0.923    0.361    2.555    0.011    0.514    0.532
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   zoop ~                                                                
##     chla              0.553    0.130    4.256    0.000    0.904    0.790
##     potam             0.034    0.079    0.433    0.665    0.056    0.056
##     flow             -0.163    0.081   -2.010    0.044   -0.267   -0.273
##   chla ~                                                                
##     potam            -0.241    0.149   -1.625    0.104   -0.241   -0.278
##     flow              0.105    0.146    0.714    0.475    0.105    0.122
##   fish ~                                                                
##     zoop              0.728    0.247    2.955    0.003    0.801    0.801
##     flow              0.254    0.112    2.265    0.023    0.457    0.467
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .hcope             0.371    0.093    3.981    0.000    0.371    0.498
##    .mysid             0.041    0.099    0.410    0.682    0.041    0.043
##    .estfish_bsmt      0.666    0.197    3.387    0.001    0.666    0.683
##    .estfish_bsot      0.667    0.185    3.605    0.000    0.667    0.717
##    .chla              0.666    0.149    4.472    0.000    0.666    0.873
##    .zoop              0.159    0.064    2.494    0.013    0.426    0.426
##    .fish              0.065    0.130    0.502    0.616    0.211    0.211
## 
## R-Square:
##                    Estimate
##     hcope             0.502
##     mysid             0.957
##     estfish_bsmt      0.317
##     estfish_bsot      0.283
##     chla              0.127
##     zoop              0.574
##     fish              0.789
summary(modfitnorth_dtr, standardized=T, rsq=T)
## lavaan 0.6-4 ended normally after 32 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         16
## 
##                                                   Used       Total
##   Number of observations                            40          47
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                      21.057
##   Degrees of freedom                                 9
##   P-value (Chi-square)                           0.012
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   zoop =~                                                               
##     hcope             1.000                               0.455    0.533
##     mysid             1.882    0.551    3.417    0.001    0.857    0.826
##   fish =~                                                               
##     estfish_bsmt      1.000                               0.847    0.857
##     estfish_bsot      0.401    0.229    1.750    0.080    0.340    0.344
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   zoop ~                                                                
##     chla              0.377    0.108    3.490    0.000    0.828    0.816
##     potam             0.066    0.053    1.246    0.213    0.145    0.152
##     flow             -0.220    0.080   -2.757    0.006   -0.482   -0.491
##   chla ~                                                                
##     potam             0.086    0.159    0.540    0.589    0.086    0.091
##     flow              0.230    0.164    1.405    0.160    0.230    0.238
##   fish ~                                                                
##     zoop              1.603    0.520    3.082    0.002    0.863    0.863
##     flow              0.473    0.142    3.333    0.001    0.559    0.569
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .hcope             0.523    0.123    4.255    0.000    0.523    0.716
##    .mysid             0.342    0.115    2.987    0.003    0.342    0.318
##    .estfish_bsmt      0.258    0.325    0.796    0.426    0.258    0.265
##    .estfish_bsot      0.859    0.199    4.320    0.000    0.859    0.881
##    .chla              0.925    0.207    4.472    0.000    0.925    0.953
##    .zoop              0.035    0.029    1.206    0.228    0.170    0.170
##    .fish              0.225    0.326    0.691    0.489    0.314    0.314
## 
## R-Square:
##                    Estimate
##     hcope             0.284
##     mysid             0.682
##     estfish_bsmt      0.735
##     estfish_bsot      0.119
##     chla              0.047
##     zoop              0.830
##     fish              0.686
# par(mfrow=c(1,2))
# semPaths(modfitnorth, "std", edge.label.cex = 1, residuals = F)
# semPaths(modfitnorth, "par", edge.label.cex = 1, residuals = F)

labelsnorth <- createLabels(modfitnorth, cnames)

# residuals(modfitnorth)
# modificationindices(modfitnorth)
modsouth='zoop=~hcope+mysid
        fish=~estfish_bsmt+estfish_bsot
        zoop~chla+corbic+flow
        chla~corbic+flow
        fish~zoop+flow
'
modfitsouth=sem(modsouth, data=filter(fdr,region=="South"))
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated lv
## variances are negative
modfitsouth_dtr=sem(modsouth, data=filter(fdr_dtr,region=="South"))
## Warning in lavaan::lavaan(model = modsouth, data = filter(fdr_dtr, region
## == : lavaan WARNING: the optimizer warns that a solution has NOT been
## found!
summary(modfitsouth, standardized=T, rsq=T)
## lavaan 0.6-4 ended normally after 33 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         16
## 
##                                                   Used       Total
##   Number of observations                            40          47
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                      19.514
##   Degrees of freedom                                 9
##   P-value (Chi-square)                           0.021
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   zoop =~                                                               
##     hcope             1.000                               0.573    0.588
##     mysid             1.397    0.337    4.146    0.000    0.801    0.829
##   fish =~                                                               
##     estfish_bsmt      1.000                               0.475    0.481
##     estfish_bsot      0.994    0.495    2.007    0.045    0.472    0.499
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   zoop ~                                                                
##     chla              0.464    0.107    4.334    0.000    0.809    0.825
##     corbic            0.201    0.072    2.806    0.005    0.350    0.349
##     flow             -0.148    0.062   -2.395    0.017   -0.257   -0.269
##   chla ~                                                                
##     corbic            0.446    0.154    2.902    0.004    0.446    0.436
##     flow             -0.014    0.147   -0.093    0.926   -0.014   -0.014
##   fish ~                                                                
##     zoop              0.656    0.288    2.280    0.023    0.792    0.792
##     flow              0.058    0.107    0.539    0.590    0.122    0.127
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .hcope             0.623    0.144    4.338    0.000    0.623    0.654
##    .mysid             0.291    0.095    3.051    0.002    0.291    0.312
##    .estfish_bsmt      0.750    0.213    3.519    0.000    0.750    0.769
##    .estfish_bsot      0.671    0.199    3.380    0.001    0.671    0.751
##    .chla              0.846    0.189    4.472    0.000    0.846    0.813
##    .zoop             -0.003    0.036   -0.077    0.938   -0.008   -0.008
##    .fish              0.083    0.134    0.618    0.536    0.367    0.367
## 
## R-Square:
##                    Estimate
##     hcope             0.346
##     mysid             0.688
##     estfish_bsmt      0.231
##     estfish_bsot      0.249
##     chla              0.187
##     zoop                 NA
##     fish              0.633
summary(modfitsouth_dtr, standardized=T, rsq=T)
## lavaan 0.6-4 did NOT end normally after 220 iterations
## ** WARNING ** Estimates below are most likely unreliable
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         16
## 
##                                                   Used       Total
##   Number of observations                            40          47
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                          NA
##   Degrees of freedom                                NA
##   P-value                                           NA
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate   Std.Err  z-value  P(>|z|)   Std.lv 
##   zoop =~                                                        
##     hcope              1.000                                0.501
##     mysid              1.707       NA                       0.855
##   fish =~                                                        
##     estfish_bsmt       1.000                               68.898
##     estfish_bsot       0.000       NA                       0.000
##   Std.all 
##           
##      0.525
##      0.825
##           
##     69.760
##      0.000
## 
## Regressions:
##                    Estimate   Std.Err  z-value  P(>|z|)   Std.lv 
##   zoop ~                                                         
##     chla               0.417       NA                       0.833
##     corbic             0.044       NA                       0.087
##     flow              -0.149       NA                      -0.298
##   chla ~                                                         
##     corbic             0.160       NA                       0.160
##     flow              -0.032       NA                      -0.032
##   fish ~                                                         
##     zoop              -0.151       NA                      -0.001
##     flow               0.011       NA                       0.000
##   Std.all 
##           
##      0.847
##      0.087
##     -0.310
##           
##      0.157
##     -0.033
##           
##     -0.001
##      0.000
## 
## Variances:
##                    Estimate   Std.Err  z-value  P(>|z|)   Std.lv 
##    .hcope              0.660       NA                       0.660
##    .mysid              0.342       NA                       0.342
##    .estfish_bsmt   -4745.909       NA                   -4745.909
##    .estfish_bsot       0.942       NA                       0.942
##    .chla               1.010       NA                       1.010
##    .zoop               0.043       NA                       0.173
##    .fish            4746.878       NA                       1.000
##   Std.all 
##      0.725
##      0.319
##  -4865.453
##      1.000
##      0.977
##      0.173
##      1.000
## 
## R-Square:
##                    Estimate 
##     hcope              0.275
##     mysid              0.681
##     estfish_bsmt          NA
##     estfish_bsot       0.000
##     chla               0.023
##     zoop               0.827
##     fish               0.000
# par(mfrow=c(1,2))
# semPaths(modfitsouth, "std", edge.label.cex = 1, residuals = F)
# semPaths(modfitsouth, "par", edge.label.cex = 1, residuals = F)

labelssouth <- createLabels(modfitsouth, cnames)

# residuals(modfitsouth)
# modificationindices(modfitsouth)

Nice plots

Without covariances

Original units

West

myLavaanPlot(model=modfitwest, labels=labelswest,
                         node_options=list(shape="box", fontname="Helvetica"), 
                         coefs=TRUE, stand=TRUE, covs=FALSE, sig=0.05, 
                         width=c("regress","latent"),
                         color=c("regress","latent"))

North

South

myLavaanPlot(model=modfitsouth, labels=labelssouth,
                         node_options=list(shape="box", fontname="Helvetica"), 
                         coefs=TRUE, stand=TRUE, covs=FALSE, sig=0.05, 
                         width=c("regress","latent"),
                         color=c("regress","latent"))

Detrended

West

myLavaanPlot(model=modfitwest_dtr, labels=labelswest,
                         node_options=list(shape="box", fontname="Helvetica"), 
                         coefs=TRUE, stand=TRUE, covs=FALSE, sig=0.05, 
                         width=c("regress","latent"),
                         color=c("regress","latent"))

North

South

myLavaanPlot(model=modfitsouth_dtr, labels=labelssouth,
                         node_options=list(shape="box", fontname="Helvetica"), 
                         coefs=TRUE, stand=TRUE, covs=FALSE, sig=0.05, 
                         width=c("regress","latent"),
                         color=c("regress","latent"))
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = object@SampleStats, : lavaan WARNING:
##     Could not compute standard errors! The information matrix could
##     not be inverted. This may be a symptom that the model is not
##     identified.
## Error in JAC %*% OUT : requires numeric/complex matrix/vector arguments

With covariances

Original units

Detrended